#!usr/bin/python
#coding=utf-8

import math

from ltmex.ltmgrab import grabHtml
from ltmex.ltmgrab import defaultHeader



# 浏览器中的空格转字符
quoteSpace = '%20'

# baidu搜索结果匹配
patBaiduResult = 'href="(https*://www.baidu.com/link\?[^"]+?)"'


'''
words  搜索词条
rn  每页结果数量
pn  从第几条结果开始

'''
def baidu(words=[], rn=50, pn=0):
    if not words:
        return []

    wd = words[0]
    for v in words[1:len(words)]:
        wd = wd + quoteSpace + v

    url = "https://www.baidu.com/s?wd=%s&rn=%s&pn=%s"%(wd, rn, pn)
    li = grabHtml(pattern=patBaiduResult, url=url, headers=defaultHeader)
    return li
        

'''
words  搜索词条
num 结果条数

'''
def searchBaidu( words=[], num = 100):

    if num < 50:
        return baidu( words, rn=num, pn=0)
    
    li = baidu(words, rn=50, pn=0)
    
    c = math.ceil(num / 50)
    for i in range(1,c):
        l = baidu(words, rn=50, pn=(i*50))
        if l:
            li = li + l

    return li
    
    
    











    
    
